from pyspark import SparkConf
from pyspark import SparkContext


def test1(ite):
    print('test1函数的调用')
    return map(lambda x: x * x, ite).__iter__()


def test2(number, ite):
    print('test2函数的调用')
    print('分区的编号是{}'.format(number))
    return map(lambda x: x * x, ite).__iter__()


if __name__ == '__main__':
    conf = SparkConf().setMaster("local[*]").setAppName("spark01")
    sc = SparkContext(conf=conf)

    # # 第一个例子
    rdd1 = sc.parallelize(range(1, 6), 2)

    rdd2 = rdd1.mapPartitionsWithIndex(test2)
    # rdd2 = rdd1.mapPartitions(test1)

    print(rdd2.collect())
